import tabula

# PDF文件的位置
file_path = "/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/云鹰平台服务系统接口协议V1.8.4.pdf"

# 使用Tabula读取PDF中的表格
# pages参数指明了提取哪一页中的表格
# multiple_tables为True时允许一次提取多个表格
tables = tabula.read_pdf(file_path, pages='all', multiple_tables=True)

# 打印提取的表格，表格会以pandas DataFrame的形式呈现
for i, table in enumerate(tables):
    # print(f"Table {i}:")
    # print(table)
    # print("\n")
    # print("|"*50 + f"    Table {i}   " + "|"*50)
    str_num = 0
    for index, row in table.iterrows():
        row = row.replace('Unnamed:', '')
        row1 = ''
        for k in row.index:
            if isinstance(row[k], str):
                row[k] = row[k].replace('Unnamed:', '')
                row[k] = row[k].replace('Name:', '')
            if isinstance(row[k], float):
                if str(row[k]).split('.')[-1]=='0':
                    row[k] = int(row[k])
            row1 += f'{row[k]} , '
        print(f"{row1}")
        str_num += len(row1)
    print(f"总字数: {str_num}")
